import numpy as np
import pandas as pd
import scipy.special as sp
from scipy.stats import norm, beta, gamma, invgamma
import scipy.stats as st
from scipy.optimize import fsolve
import arviz as az
import graphviz
import matplotlib.pyplot as plt
import seaborn as sns
import pymc as pm
import pymc_bart as pmb
import xarray as xr
import operator
import pytensor.tensor as pt
from operator import itemgetter
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from itertools import chain
from IPython.display import display, Markdown
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
plt.style.use("ggplot")
plt.rcParams["figure.figsize"] = [10, 6]
plt.rcParams["figure.dpi"] = 100
plt.rcParams["figure.facecolor"] = "white"
sns_c = sns.color_palette(palette='tab10')
# Daily Dataset from: https://archive.ics.uci.edu/dataset/275/bike+sharing+dataset
df = pd.read_csv('day.csv').drop('instant', axis = 1)
df.rename(columns={"dteday":"date", "cnt":"rentals"}, inplace = True)
df['date'] = pd.to_datetime(df['date'])
df['intercept'] = 1.0
df['temp_diffs'] = df['temp'].diff()
# Need to scale Rentals
target_scaler = MinMaxScaler()
rentals = df['rentals'].to_numpy()
rentals_scaled = target_scaler.fit_transform(X=rentals.reshape(-1,1)).flatten()
features = [
'intercept', 'holiday', 'workingday',
'weathersit','temp', 'hum', 'windspeed',
'season'
]
X = df.loc[:, features].to_numpy(copy=True)
Y = df.loc[:, 'rentals'].to_numpy(copy = True)
tscv = TimeSeriesSplit(n_splits = 2, gap = 0, test_size = 5)
# Create Train/Test Splits
_, (train_idx, test_idx) = tscv.split(X)
X_train, X_test = X[train_idx, :], X[test_idx]
Y_train, Y_test = Y[train_idx], Y[test_idx]
assert len(X_train) == len(Y_train)
assert len(X_test) == len(Y_test)
# n = Number of rows and p = number of predictors including intercept
n, p = X.shape
print(f"Number of Rows: {n} | Number of Predictors: {p}")
print()
df.loc[:, features + ['temp_diffs', 'rentals']]
Number of Rows: 731 | Number of Predictors: 8
| intercept | holiday | workingday | weathersit | temp | hum | windspeed | season | temp_diffs | rentals | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | 0 | 0 | 2 | 0.344167 | 0.805833 | 0.160446 | 1 | NaN | 985 |
| 1 | 1.0 | 0 | 0 | 2 | 0.363478 | 0.696087 | 0.248539 | 1 | 0.019311 | 801 |
| 2 | 1.0 | 0 | 1 | 1 | 0.196364 | 0.437273 | 0.248309 | 1 | -0.167114 | 1349 |
| 3 | 1.0 | 0 | 1 | 1 | 0.200000 | 0.590435 | 0.160296 | 1 | 0.003636 | 1562 |
| 4 | 1.0 | 0 | 1 | 1 | 0.226957 | 0.436957 | 0.186900 | 1 | 0.026957 | 1600 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 726 | 1.0 | 0 | 1 | 2 | 0.254167 | 0.652917 | 0.350133 | 1 | 0.010834 | 2114 |
| 727 | 1.0 | 0 | 1 | 2 | 0.253333 | 0.590000 | 0.155471 | 1 | -0.000834 | 3095 |
| 728 | 1.0 | 0 | 0 | 2 | 0.253333 | 0.752917 | 0.124383 | 1 | 0.000000 | 1341 |
| 729 | 1.0 | 0 | 0 | 1 | 0.255833 | 0.483333 | 0.350754 | 1 | 0.002500 | 1796 |
| 730 | 1.0 | 0 | 1 | 2 | 0.215833 | 0.577500 | 0.154846 | 1 | -0.040000 | 2729 |
731 rows × 10 columns
df.loc[:, features + ['rentals']].describe().round(2)
| intercept | holiday | workingday | weathersit | temp | hum | windspeed | season | rentals | |
|---|---|---|---|---|---|---|---|---|---|
| count | 731.0 | 731.00 | 731.00 | 731.00 | 731.00 | 731.00 | 731.00 | 731.00 | 731.00 |
| mean | 1.0 | 0.03 | 0.68 | 1.40 | 0.50 | 0.63 | 0.19 | 2.50 | 4504.35 |
| std | 0.0 | 0.17 | 0.47 | 0.54 | 0.18 | 0.14 | 0.08 | 1.11 | 1937.21 |
| min | 1.0 | 0.00 | 0.00 | 1.00 | 0.06 | 0.00 | 0.02 | 1.00 | 22.00 |
| 25% | 1.0 | 0.00 | 0.00 | 1.00 | 0.34 | 0.52 | 0.13 | 2.00 | 3152.00 |
| 50% | 1.0 | 0.00 | 1.00 | 1.00 | 0.50 | 0.63 | 0.18 | 3.00 | 4548.00 |
| 75% | 1.0 | 0.00 | 1.00 | 2.00 | 0.66 | 0.73 | 0.23 | 3.00 | 5956.00 |
| max | 1.0 | 1.00 | 1.00 | 3.00 | 0.86 | 0.97 | 0.51 | 4.00 | 8714.00 |
fig, ax = plt.subplots(nrows= 1, ncols = 2, figsize=(15,5))
sns.kdeplot(x='rentals', data=df, fill=True, color='blue', ax = ax[0])
sns.kdeplot(data=rentals_scaled, fill=True, color='blue', ax = ax[1])
ax[0].set_title('Kernel Density: Bike Rentals')
ax[1].set_title('Kernel Density: Scaled Bike Rentals')
fig.suptitle('Bike Rentals', y=1);
def plot_numeric_features(numeric_features, target, save_fig = True):
fig, axes = plt.subplots(
nrows=len(numeric_features) + 1,
ncols=1,
figsize=(15, 12),
constrained_layout=True
)
sns.lineplot(x='date', y=target, data=df, color='red', ax=axes[0])
axes[0].set(title=target, ylabel='Rentals')
for i, feature in enumerate(numeric_features):
ax = axes[i+1]
sns.lineplot(
x='date',
y=feature,
data=df,
color=sns_c[i],
ax=ax
)
ax.set(title=feature, ylabel='Normalized Value')
if save_fig:
plt.savefig('numeric_features.png')
plt.show()
numeric_features = [
'temp',
'temp_diffs',
'hum',
'windspeed',
]
plot_numeric_features(numeric_features, 'rentals', save_fig = True)
def plot_cat_features(df, cat_cols, y, save_fig = True):
fig, axes = plt.subplots(
nrows=3,
ncols=1,
figsize=(10, 8),
constrained_layout=True
)
axes = axes.flatten()
for i, feature in enumerate(cat_cols):
ax = axes[i]
feature_df = df \
.groupby(feature, as_index=False) \
.agg({y: np.mean}) \
.sort_values(y)
sns.barplot(x=feature, y=y, data=feature_df, dodge=False, ax=ax)
ax.set(title=feature, ylabel=None)
fig.suptitle(f'Average # of Bike Rentals over Categorical Features')
if save_fig:
plt.savefig('cat_features.png')
plt.show()
cat_features = [
'holiday',
'workingday',
'weathersit',
]
plot_cat_features(df, cat_features, 'rentals', save_fig = True)
date = df['date'].to_numpy()
intercept = df['intercept'].to_numpy()
temp = df['temp'].to_numpy()
hum = df['hum'].to_numpy()
windspeed = df['windspeed'].to_numpy()
# Categorical Variables - Needed for ZeroSumNormal Dist
holiday_idx, holiday = df['holiday'].factorize()
workingday_idx, workingday = df['workingday'].factorize()
weathersit_idx, weathersit = df['weathersit'].factorize()
# Categorical Dist Probabilities
# 12 Holidays: https://dchr.dc.gov/page/holiday-schedules
p_holiday = 12/365
# Weekdays / # of Days in Week
p_working_day = 5/7
# More informative prior based on typical Washington Data
p_weathersit = [6/10, 3/10, 1/10]
rng = np.random.default_rng(42)
coords = {
"date": date,
}
with pm.Model(coords=coords) as base_model:
# Intercept Prior
intercept = pm.Normal(name="intercept", mu = 0, sigma = 1)
# Beta Coefficient Priors (Informative) - Numeric
# Temp: https://www.weather.gov/media/lwx/climate/dcatemps.pdf
beta_temp = pm.Normal("beta_temp", mu = 0.44, sigma = 0.2)
# Likelihood precision/variance priors
nu = pm.Gamma("nu", alpha = 8, beta = 2)
sigma = pm.HalfNormal("sigma", sigma = 1)
# Regression Function
mu = pm.Deterministic("mu", var = intercept + beta_temp * temp, dims="date")
# Likelihood
# Use StudentT over Normal since it handles small samples better than Normal Dist
likelihood = pm.StudentT(
name="likelihood",
mu = mu,
nu = nu,
sigma = sigma,
observed = rentals_scaled,
dims="date",
)
base_trace = pm.sample(
draws = 10000,
chains = 4,
tune = 2000,
cores = 4,
idata_kwargs = {"log_likelihood": True},
random_seed = rng,
)
prior_pred_base = pm.sample_prior_predictive(samples = 1000, random_seed = rng)
ppc_base = pm.sample_posterior_predictive(
trace = base_trace, random_seed = rng, extend_inferencedata = True,
)
az.summary(base_trace, hdi_prob = 0.95, var_names = ["~mu"], round_to = 3)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [intercept, beta_temp, nu, sigma]
Sampling 4 chains for 2_000 tune and 10_000 draw iterations (8_000 + 40_000 draws total) took 47 seconds. Sampling: [beta_temp, intercept, likelihood, nu, sigma] Sampling: [likelihood]
| mean | sd | hdi_2.5% | hdi_97.5% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| intercept | 0.134 | 0.018 | 0.098 | 0.169 | 0.000 | 0.000 | 20991.846 | 23310.367 | 1.0 |
| beta_temp | 0.765 | 0.035 | 0.697 | 0.833 | 0.000 | 0.000 | 20779.664 | 23287.662 | 1.0 |
| nu | 10.521 | 1.735 | 7.289 | 13.976 | 0.011 | 0.008 | 26722.969 | 26427.229 | 1.0 |
| sigma | 0.163 | 0.005 | 0.153 | 0.173 | 0.000 | 0.000 | 26790.736 | 25262.860 | 1.0 |
def plot_prior_predictive(ppc, title, model_name, save_fig = True):
# Code referenced from: https://www.pymc.io/projects/examples/en/latest/time_series/Forecasting_with_structural_timeseries.html
palette = "plasma"
cmap = plt.get_cmap(palette)
percs = np.linspace(51, 99, 100)
colors = (percs - np.min(percs)) / (np.max(percs) - np.min(percs))
fig, ax = plt.subplots(figsize=(10, 4))
for i, p in enumerate(percs[::-1]):
upper = np.percentile(ppc.prior_predictive["likelihood"], p, axis = 1)
lower = np.percentile(ppc.prior_predictive["likelihood"], 100 - p, axis = 1)
color_val = colors[i]
ax.fill_between(
x=date,
y1=upper.flatten(),
y2=lower.flatten(),
color=cmap(color_val),
alpha=0.1,
)
sns.lineplot(x=date, y=rentals_scaled, color="k", ax=ax, label="Rentals Scaled")
ax.set(title=f"{title} - Prior Predictive Fit", xlabel="date", ylabel="Scaled Bike rentals")
ax.legend()
plt.tight_layout()
if save_fig:
fig.savefig(f'prior_predictive_{model_name}')
plt.plot()
plot_prior_predictive(prior_pred_base, title = 'Base Model: Intercept + Temperature', model_name = 'baseline_lin_reg_0.png', save_fig = True)
def plot_posterior_predictive(ppc, title, model_name, save_fig = True):
# Code referenced from: https://www.pymc.io/projects/examples/en/latest/time_series/Forecasting_with_structural_timeseries.html
palette = "Wistia"
cmap = plt.get_cmap(palette)
percs = np.linspace(51, 99, 100)
colors = (percs - np.min(percs)) / (np.max(percs) - np.min(percs))
ppc_likelihood = ppc.posterior_predictive["likelihood"].stack(sample=("chain", "draw"))
# Need to unscale our rentals data to get a proper view for posterior fit
ppc_likelihood_inv = target_scaler.inverse_transform(
X = ppc_likelihood.to_numpy()
)
fig, ax = plt.subplots(figsize=(12, 6))
for i, p in enumerate(percs[::-1]):
upper = np.percentile(ppc_likelihood_inv, p, axis = 1)
lower = np.percentile(ppc_likelihood_inv, 100 - p, axis = 1)
color_val = colors[i]
ax.fill_between(
x=date,
y1=upper,
y2=lower,
color=cmap(color_val),
alpha=0.05,
)
sns.lineplot(
x=date,
y=ppc_likelihood_inv.mean(axis=1),
color="blue",
label="posterior predictive mean",
ax=ax,
)
sns.lineplot(
x=date,
y=rentals,
color="black",
label="rentals",
ax=ax,
)
ax.legend(loc="upper left")
ax.set(
title=f"{title} - Posterior Predictive Fit",
xlabel="date",
ylabel="rentals",
)
plt.tight_layout()
if save_fig:
fig.savefig(f'posterior_predictive_{model_name}')
plt.show()
plot_posterior_predictive(ppc_base, 'Base Model: Intercept + Temperature', model_name = 'baseline_lin_reg_0.png', save_fig = True)
rng = np.random.default_rng(42)
coords = {
"date": date,
}
with pm.Model(coords=coords) as base_model_normal:
# Intercept Prior
intercept = pm.Normal(name="intercept", mu = 0, sigma = 1)
# Beta Coefficient Priors (Informative) - Numeric
# Temp: https://www.weather.gov/media/lwx/climate/dcatemps.pdf
beta_temp = pm.Normal("beta_temp", mu = 0.44, sigma = 0.2)
# Likelihood precision/variance priors
sigma = pm.HalfNormal("sigma", sigma = 1)
# Regression Function
mu = pm.Deterministic("mu", var = intercept + beta_temp * temp, dims="date")
# Likelihood
# Use StudentT over Normal since it handles small samples better than Normal Dist
likelihood = pm.Normal(
name="likelihood",
mu = mu,
sigma = sigma,
observed = rentals_scaled,
dims="date",
)
base_trace_normal = pm.sample(
draws = 10000,
chains = 4,
tune = 2000,
cores = 4,
idata_kwargs = {"log_likelihood": True},
random_seed = rng,
)
prior_pred_base_normal = pm.sample_prior_predictive(samples = 1000, random_seed = rng)
ppc_base_normal = pm.sample_posterior_predictive(
trace = base_trace_normal, random_seed = rng, extend_inferencedata = True,
)
az.summary(base_trace_normal, hdi_prob = 0.95, var_names = ["~mu"], round_to = 3)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [intercept, beta_temp, sigma]
Sampling 4 chains for 2_000 tune and 10_000 draw iterations (8_000 + 40_000 draws total) took 34 seconds. Sampling: [beta_temp, intercept, likelihood, sigma] Sampling: [likelihood]
| mean | sd | hdi_2.5% | hdi_97.5% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| intercept | 0.142 | 0.018 | 0.106 | 0.178 | 0.0 | 0.0 | 13586.162 | 16185.476 | 1.0 |
| beta_temp | 0.755 | 0.035 | 0.688 | 0.823 | 0.0 | 0.0 | 13729.422 | 16303.977 | 1.0 |
| sigma | 0.174 | 0.005 | 0.165 | 0.183 | 0.0 | 0.0 | 22226.736 | 21269.382 | 1.0 |
plot_prior_predictive(prior_pred_base_normal, title = 'Base Model: Intercept + Temperature (Normal Likelihood)', model_name = 'baseline_lin_reg_1_normal.png', save_fig = True)
plot_posterior_predictive(ppc_base_normal, 'Base Model: Intercept + Temperature (Normal Likelihood)', model_name = 'baseline_lin_reg_1_normal.png', save_fig = True)
post = ppc_base.posterior
mu_pp = post["intercept"] + post["beta_temp"] * xr.DataArray(temp, dims=["date"])
lik = base_trace_normal.posterior_predictive["likelihood"]
fig, ax = plt.subplots(figsize=(10, 5))
sns.lineplot(x=temp, y=mu_pp.mean(("chain", "draw")), label="Posterior Predictive Mean Response", color="blue", alpha=0.6, ax = ax)
sns.scatterplot(x=temp, y=rentals_scaled, label = "Observed Data", ax = ax)
az.plot_hdi(temp, lik, hdi_prob = 0.95, fill_kwargs={'alpha': 0.1, "label" : "$95\%$ HDI"}, ax = ax)
plt.xlabel("Temperature (scaled)")
plt.ylabel("Bike Rentals (scaled)")
plt.title('Mean Response Vs. Observed Data With 95% HDI')
plt.tight_layout()
save_fig = True
if save_fig:
plt.savefig('mean_response_baseline_lin_reg_0.png')
plt.show()
rng = np.random.default_rng(42)
coords = {
"date": date,
"workingday": workingday,
"weathersit": weathersit,
"holiday": holiday,
}
with pm.Model(coords=coords) as full_model_1:
# Intercept Prior
intercept = pm.Normal(name="intercept", mu = 0, sigma = 1)
# Beta Coefficient Priors (Informative) - Numeric
# Temp: https://www.weather.gov/media/lwx/climate/dcatemps.pdf
beta_temp = pm.Normal("beta_temp", mu = 0.5, sigma = 0.2)
# Humidity: https://www.worlddata.info/america/usa/climate-washington-d-c.php
beta_hum = pm.Normal("beta_hum", mu = 0.5, sigma = 0.1)
# https://weatherspark.com/y/20957/Average-Weather-in-Washington-D.C.;-United-States-Year-Round#Sections-Wind
beta_windspeed = pm.Normal("beta_windspeed", mu = 0.2, sigma = 0.2)
# Beta Coefficient Priors - Categorical
beta_holiday = pm.Normal("beta_holiday", mu = 0, sigma=1, shape = len(df['holiday'].unique()))
beta_workingday = pm.Normal("beta_workingday", mu = 0, sigma=1, shape = len(df['workingday'].unique()))
beta_weathersit = pm.Normal("beta_weathersit", mu = 0, sigma=1, shape = len(df['weathersit'].unique()))
# Likelihood precision/variance priors
nu = pm.Gamma("nu", alpha = 2, beta = 0.1)
sigma = pm.HalfNormal("sigma", sigma = 1)
# Regression Function
mu = pm.Deterministic(
"mu",
var = (
intercept
+ beta_temp * temp
+ beta_hum * hum
+ beta_windspeed * windspeed
+ beta_holiday[holiday_idx]
+ beta_workingday[workingday_idx]
+ beta_weathersit[weathersit_idx]
),
dims="date",
)
# Likelihood
# Use StudentT over Normal since it supposedly handles small samples better than Normal Dist
likelihood = pm.StudentT(
name="likelihood",
mu = mu,
nu = nu,
sigma = sigma,
observed = rentals_scaled,
dims="date",
)
full_trace_1 = pm.sample(
draws = 5000,
chains = 4,
tune = 2000,
cores = 4,
# target_accept=0.95,
idata_kwargs = {"log_likelihood": True},
random_seed = rng,
)
prior_pred_full_1 = pm.sample_prior_predictive(samples = 1000, random_seed = rng)
ppc_full_1 = pm.sample_posterior_predictive(
trace = full_trace_1, random_seed = rng, extend_inferencedata = True,
)
az.summary(full_trace_1, hdi_prob = 0.95, var_names = ["~mu"], round_to = 3)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [intercept, beta_temp, beta_hum, beta_windspeed, beta_holiday, beta_workingday, beta_weathersit, nu, sigma]
Sampling 4 chains for 2_000 tune and 5_000 draw iterations (8_000 + 20_000 draws total) took 1204 seconds. Sampling: [beta_holiday, beta_hum, beta_temp, beta_weathersit, beta_windspeed, beta_workingday, intercept, likelihood, nu, sigma] Sampling: [likelihood]
| mean | sd | hdi_2.5% | hdi_97.5% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| intercept | 0.058 | 0.760 | -1.409 | 1.554 | 0.007 | 0.005 | 12956.439 | 13142.530 | 1.0 |
| beta_temp | 0.711 | 0.034 | 0.646 | 0.777 | 0.000 | 0.000 | 26025.263 | 15443.495 | 1.0 |
| beta_hum | -0.027 | 0.050 | -0.128 | 0.066 | 0.000 | 0.000 | 21198.777 | 15330.516 | 1.0 |
| beta_windspeed | -0.293 | 0.076 | -0.444 | -0.146 | 0.001 | 0.000 | 22736.322 | 15271.655 | 1.0 |
| beta_holiday[0] | 0.048 | 0.627 | -1.161 | 1.270 | 0.006 | 0.004 | 10725.708 | 11805.765 | 1.0 |
| beta_holiday[1] | -0.022 | 0.627 | -1.266 | 1.165 | 0.006 | 0.004 | 10780.999 | 11790.271 | 1.0 |
| beta_workingday[0] | 0.025 | 0.623 | -1.195 | 1.258 | 0.006 | 0.004 | 10998.951 | 12716.565 | 1.0 |
| beta_workingday[1] | 0.040 | 0.623 | -1.200 | 1.255 | 0.006 | 0.004 | 11008.971 | 12465.386 | 1.0 |
| beta_weathersit[0] | 0.061 | 0.538 | -0.969 | 1.142 | 0.006 | 0.004 | 8265.035 | 11263.958 | 1.0 |
| beta_weathersit[1] | 0.127 | 0.538 | -0.910 | 1.196 | 0.006 | 0.004 | 8254.195 | 11106.057 | 1.0 |
| beta_weathersit[2] | -0.151 | 0.538 | -1.173 | 0.939 | 0.006 | 0.004 | 8304.970 | 11319.666 | 1.0 |
| nu | 47.849 | 17.373 | 19.226 | 82.539 | 0.109 | 0.085 | 26742.218 | 14261.577 | 1.0 |
| sigma | 0.160 | 0.004 | 0.151 | 0.168 | 0.000 | 0.000 | 23120.135 | 14653.949 | 1.0 |
az.summary(full_trace_1, hdi_prob = 0.95, var_names = ["~mu"], round_to = 3)
#.to_latex()
| mean | sd | hdi_2.5% | hdi_97.5% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| intercept | 0.058 | 0.760 | -1.409 | 1.554 | 0.007 | 0.005 | 12956.439 | 13142.530 | 1.0 |
| beta_temp | 0.711 | 0.034 | 0.646 | 0.777 | 0.000 | 0.000 | 26025.263 | 15443.495 | 1.0 |
| beta_hum | -0.027 | 0.050 | -0.128 | 0.066 | 0.000 | 0.000 | 21198.777 | 15330.516 | 1.0 |
| beta_windspeed | -0.293 | 0.076 | -0.444 | -0.146 | 0.001 | 0.000 | 22736.322 | 15271.655 | 1.0 |
| beta_holiday[0] | 0.048 | 0.627 | -1.161 | 1.270 | 0.006 | 0.004 | 10725.708 | 11805.765 | 1.0 |
| beta_holiday[1] | -0.022 | 0.627 | -1.266 | 1.165 | 0.006 | 0.004 | 10780.999 | 11790.271 | 1.0 |
| beta_workingday[0] | 0.025 | 0.623 | -1.195 | 1.258 | 0.006 | 0.004 | 10998.951 | 12716.565 | 1.0 |
| beta_workingday[1] | 0.040 | 0.623 | -1.200 | 1.255 | 0.006 | 0.004 | 11008.971 | 12465.386 | 1.0 |
| beta_weathersit[0] | 0.061 | 0.538 | -0.969 | 1.142 | 0.006 | 0.004 | 8265.035 | 11263.958 | 1.0 |
| beta_weathersit[1] | 0.127 | 0.538 | -0.910 | 1.196 | 0.006 | 0.004 | 8254.195 | 11106.057 | 1.0 |
| beta_weathersit[2] | -0.151 | 0.538 | -1.173 | 0.939 | 0.006 | 0.004 | 8304.970 | 11319.666 | 1.0 |
| nu | 47.849 | 17.373 | 19.226 | 82.539 | 0.109 | 0.085 | 26742.218 | 14261.577 | 1.0 |
| sigma | 0.160 | 0.004 | 0.151 | 0.168 | 0.000 | 0.000 | 23120.135 | 14653.949 | 1.0 |
plot_prior_predictive(prior_pred_full_1, title = 'Full Model: All Features With Normal Uninformative Priors', model_name = 'full_lin_reg_0_norm', save_fig = True)
az.plot_ppc(full_trace_1, kind="cumulative", num_pp_samples=1000)
plt.show()
plot_posterior_predictive(ppc_full_1, 'Full Model: All Features With Normal Uninformative Priors', model_name = 'full_lin_reg_0_norm', save_fig = True)
rng = np.random.default_rng(42)
coords = {
"date": date,
"workingday": workingday,
"weathersit": weathersit,
"holiday": holiday,
}
with pm.Model(coords=coords) as full_model_2:
# Intercept Prior
intercept = pm.Normal(name="intercept", mu = 0, sigma = 1)
# Beta Coefficient Priors (Informative) - Numeric
# Temp: https://www.weather.gov/media/lwx/climate/dcatemps.pdf
beta_temp = pm.Normal("beta_temp", mu = 0.5, sigma = 0.2)
# Humidity: https://www.worlddata.info/america/usa/climate-washington-d-c.php
beta_hum = pm.Normal("beta_hum", mu = 0.5, sigma = 0.1)
# https://weatherspark.com/y/20957/Average-Weather-in-Washington-D.C.;-United-States-Year-Round#Sections-Wind
beta_windspeed = pm.Normal("beta_windspeed", mu = 0.2, sigma = 0.2)
# Beta Coefficient Priors - Categorical
beta_holiday = pm.ZeroSumNormal("beta_holiday", sigma=1, dims="holiday")
beta_workingday = pm.ZeroSumNormal("beta_workingday", sigma=1, dims="workingday")
beta_weathersit = pm.ZeroSumNormal("beta_weathersit", sigma=1, dims="weathersit")
# Likelihood precision/variance priors
nu = pm.Gamma("nu", alpha = 2, beta = 0.1)
sigma = pm.HalfNormal("sigma", sigma = 1)
# Regression Function
mu = pm.Deterministic(
"mu",
var = (
intercept
+ beta_temp * temp
+ beta_hum * hum
+ beta_windspeed * windspeed
+ beta_holiday[holiday_idx]
+ beta_workingday[workingday_idx]
+ beta_weathersit[weathersit_idx]
),
dims="date",
)
# Likelihood
# Use StudentT over Normal since it handles small samples better than Normal Dist
likelihood = pm.StudentT(
name="likelihood",
mu = mu,
nu = nu,
sigma = sigma,
observed = rentals_scaled,
dims="date",
)
full_trace_2 = pm.sample(
draws = 10000,
chains = 4,
tune = 2000,
cores = 4,
# target_accept=0.95,
idata_kwargs = {"log_likelihood": True},
random_seed = rng,
)
prior_pred_full_2 = pm.sample_prior_predictive(samples = 1000, random_seed = rng)
ppc_full_2 = pm.sample_posterior_predictive(
trace = full_trace_2, random_seed = rng, extend_inferencedata = True,
)
# az.summary(full_trace_2, hdi_prob = 0.95, var_names = ["~mu"], round_to = 2).to_latex(float_format="{:.2f}".format)
az.summary(full_trace_2, hdi_prob = 0.95, var_names = ["~mu"], round_to = 3)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [intercept, beta_temp, beta_hum, beta_windspeed, beta_holiday, beta_workingday, beta_weathersit, nu, sigma]
Sampling 4 chains for 2_000 tune and 10_000 draw iterations (8_000 + 40_000 draws total) took 177 seconds. Sampling: [beta_holiday, beta_hum, beta_temp, beta_weathersit, beta_windspeed, beta_workingday, intercept, likelihood, nu, sigma] Sampling: [likelihood]
| mean | sd | hdi_2.5% | hdi_97.5% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| intercept | 0.115 | 0.049 | 0.021 | 0.211 | 0.000 | 0.000 | 24358.682 | 26350.491 | 1.0 |
| beta_temp | 0.710 | 0.034 | 0.645 | 0.778 | 0.000 | 0.000 | 45202.443 | 28620.694 | 1.0 |
| beta_hum | -0.026 | 0.050 | -0.121 | 0.073 | 0.000 | 0.000 | 31758.058 | 29824.459 | 1.0 |
| beta_windspeed | -0.292 | 0.077 | -0.439 | -0.141 | 0.000 | 0.000 | 39393.951 | 30517.045 | 1.0 |
| beta_holiday[0] | 0.035 | 0.019 | -0.002 | 0.071 | 0.000 | 0.000 | 43967.064 | 28920.652 | 1.0 |
| beta_holiday[1] | -0.035 | 0.019 | -0.071 | 0.002 | 0.000 | 0.000 | 43967.064 | 28920.652 | 1.0 |
| beta_workingday[0] | -0.007 | 0.007 | -0.020 | 0.006 | 0.000 | 0.000 | 52162.214 | 29467.680 | 1.0 |
| beta_workingday[1] | 0.007 | 0.007 | -0.006 | 0.020 | 0.000 | 0.000 | 52162.214 | 29467.680 | 1.0 |
| beta_weathersit[2] | 0.049 | 0.014 | 0.021 | 0.076 | 0.000 | 0.000 | 37835.998 | 31485.796 | 1.0 |
| beta_weathersit[1] | 0.115 | 0.016 | 0.085 | 0.146 | 0.000 | 0.000 | 26606.156 | 29729.508 | 1.0 |
| beta_weathersit[3] | -0.164 | 0.025 | -0.212 | -0.113 | 0.000 | 0.000 | 27604.746 | 26454.208 | 1.0 |
| nu | 48.226 | 17.749 | 18.832 | 83.390 | 0.082 | 0.064 | 51046.917 | 29173.238 | 1.0 |
| sigma | 0.160 | 0.005 | 0.151 | 0.169 | 0.000 | 0.000 | 46896.264 | 28663.952 | 1.0 |
plot_prior_predictive(prior_pred_full_2, title = 'Full Model: All Features With ZeroSumCategorical Priors', model_name = 'full_lin_reg_0_zsn', save_fig = True)
az.plot_ppc(full_trace_2, kind="cumulative", num_pp_samples=1000)
plt.show()
plot_posterior_predictive(ppc_full_2, 'Full Model: All Features With ZeroSumCategorical Priors', model_name = 'full_lin_reg_0_zsn', save_fig = True)
rng = np.random.default_rng(42)
coords = {
"date": date,
}
with pm.Model(coords=coords) as grw_model:
# Intercept Prior
intercept = pm.Normal(name="intercept", mu = 0, sigma = 1)
# Beta Coefficient Priors (Informative) - Numeric
# Temp: https://www.weather.gov/media/lwx/climate/dcatemps.pdf
# beta_temp = pm.Normal("beta_temp", mu = 0.5, sigma = 0.2)
# Humidity: https://www.worlddata.info/america/usa/climate-washington-d-c.php
beta_hum = pm.Normal("beta_hum", mu = 0.5, sigma = 0.1)
# https://weatherspark.com/y/20957/Average-Weather-in-Washington-D.C.;-United-States-Year-Round#Sections-Wind
beta_windspeed = pm.Normal("beta_windspeed", mu = 0.2, sigma = 0.2)
# Beta Coefficient Priors - Categorical
beta_holiday = pm.ZeroSumNormal("beta_holiday", sigma=1, shape = 2) # 0/1 Binary
beta_workingday = pm.ZeroSumNormal("beta_workingday", sigma=1, shape = 2) # 0/1 Binary
beta_weathersit = pm.ZeroSumNormal("beta_weathersit", sigma=1, shape = 3) # 3 unique values
# Likelihood precision/variance priors
nu = pm.Gamma("nu", alpha = 2, beta = 0.1)
sigma = pm.HalfNormal("sigma", sigma = 1)
# Random Walk Priors
step_size = pm.Normal('step_size', mu = 0, sigma = 0.1)
beta_temp = pm.GaussianRandomWalk(
name="beta_temp",
sigma=step_size, # How far can we deviate from the last value
init_dist=pm.Normal.dist(mu = 0, sigma = 1),
dims="date",
)
# Regression Function
mu = pm.Deterministic(
"mu",
var = (
beta_temp * temp
+ beta_hum * hum
+ beta_windspeed * windspeed
+ beta_holiday[holiday_idx]
+ beta_workingday[workingday_idx]
+ beta_weathersit[weathersit_idx]
),
)
# Likelihood
# Use StudentT over Normal since it handles small samples better than Normal Dist
likelihood = pm.StudentT(
name="likelihood",
mu = mu,
nu = nu,
sigma = sigma,
observed = rentals_scaled,
)
grw_trace = pm.sample(
draws = 10000,
chains = 4,
tune = 2000,
cores = 4,
target_accept=0.95,
idata_kwargs = {"log_likelihood": True},
random_seed = rng,
)
# Prior predictive seems to break PyMC
# grw_prior_trace = pm.sample_prior_predictive(samples = 1000, random_seed = rng)
grw_ppc_base = pm.sample_posterior_predictive(
trace = grw_trace, random_seed = rng, extend_inferencedata = True,
)
az.summary(grw_trace, hdi_prob = 0.95, var_names = ["~mu", "~beta_temp"], round_to = 2)
# .to_latex(float_format="{:.2f}".format)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [intercept, beta_hum, beta_windspeed, beta_holiday, beta_workingday, beta_weathersit, nu, sigma, step_size, beta_temp]
Sampling 4 chains for 2_000 tune and 10_000 draw iterations (8_000 + 40_000 draws total) took 720 seconds. Sampling: [likelihood]
| mean | sd | hdi_2.5% | hdi_97.5% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| intercept | 0.01 | 1.00 | -1.90 | 2.00 | 0.0 | 0.01 | 68313.45 | 27093.47 | 1.0 |
| beta_hum | -0.02 | 0.02 | -0.07 | 0.02 | 0.0 | 0.00 | 2196.25 | 6494.89 | 1.0 |
| beta_windspeed | -0.12 | 0.03 | -0.18 | -0.05 | 0.0 | 0.00 | 8540.76 | 22181.04 | 1.0 |
| step_size | 0.06 | 0.01 | 0.05 | 0.08 | 0.0 | 0.00 | 1538.21 | 3184.50 | 1.0 |
| beta_holiday[0] | 0.06 | 0.01 | 0.04 | 0.08 | 0.0 | 0.00 | 12452.50 | 23847.61 | 1.0 |
| beta_holiday[1] | -0.06 | 0.01 | -0.08 | -0.04 | 0.0 | 0.00 | 12452.50 | 23847.61 | 1.0 |
| beta_workingday[0] | -0.00 | 0.00 | -0.01 | 0.00 | 0.0 | 0.00 | 40569.27 | 32970.31 | 1.0 |
| beta_workingday[1] | 0.00 | 0.00 | -0.00 | 0.01 | 0.0 | 0.00 | 40569.27 | 32970.31 | 1.0 |
| beta_weathersit[0] | 0.05 | 0.01 | 0.04 | 0.07 | 0.0 | 0.00 | 33374.49 | 33344.24 | 1.0 |
| beta_weathersit[1] | 0.11 | 0.01 | 0.10 | 0.13 | 0.0 | 0.00 | 6173.08 | 16805.17 | 1.0 |
| beta_weathersit[2] | -0.17 | 0.01 | -0.19 | -0.14 | 0.0 | 0.00 | 11504.30 | 25434.36 | 1.0 |
| nu | 2.67 | 0.43 | 1.90 | 3.53 | 0.0 | 0.00 | 19169.62 | 30276.45 | 1.0 |
| sigma | 0.04 | 0.00 | 0.04 | 0.05 | 0.0 | 0.00 | 5341.62 | 14184.22 | 1.0 |
plot_posterior_predictive(grw_ppc_base, 'Gaussian Random Walk Model: All Features With ZeroSumCategorical Priors', model_name = 'grw_reg_all_feats_zsn', save_fig = True)
def rev_min_max_func(scaled_arr, min_val, max_val):
og_val = (scaled_arr*(max_val - min_val)) + min_val
return og_val
unscaled_temp = rev_min_max_func(temp, -8, 39)
beta_coeffs = grw_trace['posterior']['beta_temp']
# Code referenced from: https://www.pymc.io/projects/examples/en/latest/generalized_linear_models/GLM-rolling-regression.html#analysis-of-results
fig, ax = plt.subplots(figsize=(12, 6))
az.plot_hdi(
x=unscaled_temp,
y=beta_coeffs,
hdi_prob=0.95,
color="C0",
fill_kwargs={"alpha": 0.4, "label": "95% HDI"},
ax=ax,
)
az.plot_hdi(
x=unscaled_temp,
y=beta_coeffs,
hdi_prob=0.5,
color="C0",
fill_kwargs={"alpha": 0.6, "label": "50% HDI"},
ax=ax,
)
ax.axhline(
y=full_trace_2['posterior']['beta_temp'].mean(dim=["chain", "draw"]).item(),
color="blue",
linestyle="--",
label="Linear Regression (All Features) Posterior Predictive Mean",
)
sns.rugplot(x=unscaled_temp, color="black", alpha=0.5, ax=ax)
ax.axhline(y=0.0, color="gray", linestyle="--")
ax.legend(loc="upper right")
ax.set(
title="Temperature Coefficient (Original Scale) - Posterior Predictive",
xlabel="temp",
ylabel="Beta Temperature",
)
plt.tight_layout()
save_fig = True
if save_fig:
plt.savefig('gaussian_rw_temp_coeffs.png')
plt.show()
axes = az.plot_trace(
data=grw_trace,
var_names=["~mu", "~beta_temp"],
compact=True,
kind="rank_bars",
backend_kwargs={"figsize": (12, 15), "layout": "constrained"},
)
plt.gcf().suptitle("Gaussian Random Walk Model", fontsize=16)
plt.show()
models = {
"baseline_student_t": base_trace,
"baseline_normal": base_trace_normal,
"full_model_norm": full_trace_1,
"full_model_zsn": full_trace_2,
"gaussian_rw": grw_trace,
}
az.compare(models, scale="deviance", method="stacking").round(3)
# az.compare(models, scale="deviance", method="stacking").to_latex(float_format="{:.3f}".format)
| rank | elpd_loo | p_loo | elpd_diff | weight | se | dse | warning | scale | |
|---|---|---|---|---|---|---|---|---|---|
| gaussian_rw | 0 | -1651.401 | 219.114 | 0.000 | 0.959 | 56.159 | 0.000 | False | deviance |
| full_model_norm | 1 | -571.490 | 7.552 | 1079.912 | 0.041 | 32.156 | 59.342 | False | deviance |
| full_model_zsn | 2 | -571.236 | 7.581 | 1080.165 | 0.000 | 32.142 | 59.326 | False | deviance |
| baseline_normal | 3 | -481.438 | 2.537 | 1169.963 | 0.000 | 32.504 | 58.355 | False | deviance |
| baseline_student_t | 4 | -458.049 | 2.673 | 1193.352 | 0.000 | 33.419 | 59.223 | False | deviance |
def compute_laud_ibrahim_criteria(trace, scaled_y):
""" Code referenced from: https://areding.github.io/6420-pymc/unit9/Unit9-hald.html?highlight=ibrahim
"""
Y_new = az.summary(trace.posterior_predictive)["mean"].values
D2 = (scaled_y - Y_new) ** 2
L = np.sqrt(np.sum(D2, axis=0) + np.std(Y_new, axis=0) ** 2)
return L
model_scores = dict(zip(models.keys(), [0] * len(models)))
for model_name, trace_ in models.items():
model_scores[model_name] = compute_laud_ibrahim_criteria(trace_, rentals_scaled)
model_scores
{'baseline_student_t': 4.6919201801878,
'baseline_normal': 4.6920673536411135,
'full_model_norm': 4.363369420078653,
'full_model_zsn': 4.363611454140253,
'gaussian_rw': 1.9000069698310282}
model_names = list(model_scores.keys())
scores = [model_scores[k] for k in model_names]
colors = ["grey" if m > np.min(scores) else "green" for m in scores]
fig = plt.Figure(figsize=(7,4))
sns.barplot(x=model_names, y=scores, palette = colors)
plt.title('Model Comparison: Laud-Ibrahim Criteria')
plt.ylabel("Score")
save_fig = True
if save_fig:
plt.savefig('model_comparison.png')
plt.show()
def compute_r2_score(ppc_trace, y):
y_pred = ppc_trace.posterior_predictive.stack(sample=("chain", "draw"))["likelihood"].values.T
return az.r2_score(y, y_pred).values[0].round(3)
models_ppc = {
"baseline_student_t": ppc_base,
"baseline_normal": ppc_base_normal,
"full_model_norm": ppc_full_1,
"full_model_zsn": ppc_full_2,
"gaussian_rw": grw_ppc_base
}
model_r2_scores = dict(zip(models_ppc.keys(), [0] * len(models_ppc)))
for model_name, ppc_ in models_ppc.items():
model_r2_scores[model_name] = compute_r2_score(ppc_, rentals_scaled)
r2_df = pd.DataFrame.from_dict(model_r2_scores, orient='index', columns =['R2 Score']).T
# r2_df.to_latex(float_format="{:.3f}".format)
r2_df
| baseline_student_t | baseline_normal | full_model_norm | full_model_zsn | gaussian_rw | |
|---|---|---|---|---|---|
| R2 Score | 0.455 | 0.45 | 0.486 | 0.486 | 0.803 |
axes = az.plot_trace(
data=full_trace_2,
var_names=["~mu"],
compact=True,
kind="rank_bars",
backend_kwargs={"figsize": (12, 15), "layout": "constrained"},
)
plt.gcf().suptitle("Full Model With ZeroSumCategorical Priors - Trace")
plt.show()
axes = az.plot_forest(
data=grw_trace,
var_names=["~mu", "~nu", "~sigma", "~beta_temp"],
combined=True,
r_hat=True,
ess=True,
figsize=(10, 6),
hdi_prob = 0.95
)
axes[0].axvline(x=0.0, color="black", linestyle="--")
plt.gcf().suptitle("Gaussian Random Walk Model")
plt.show()
rng = np.random.default_rng(42)
coords = {
"date": date,
"workingday": workingday,
"weathersit": weathersit,
"holiday": holiday,
}
with pm.Model(coords=coords) as nb_model:
# Intercept Prior
intercept = pm.Normal(name="intercept", mu = 0, sigma = 1)
# Beta Coefficient Priors (Informative) - Numeric
# Temp: https://www.weather.gov/media/lwx/climate/dcatemps.pdf
beta_temp = pm.Normal("beta_temp", mu = 0.5, sigma = 0.2)
# Humidity: https://www.worlddata.info/america/usa/climate-washington-d-c.php
beta_hum = pm.Normal("beta_hum", mu = 0.5, sigma = 0.1)
# https://weatherspark.com/y/20957/Average-Weather-in-Washington-D.C.;-United-States-Year-Round#Sections-Wind
beta_windspeed = pm.Normal("beta_windspeed", mu = 0.2, sigma = .2)
# Beta Coefficient Priors - Categorical
beta_holiday = pm.ZeroSumNormal("beta_holiday", sigma=1, dims="holiday")
beta_workingday = pm.ZeroSumNormal("beta_workingday", sigma=1, dims="workingday")
beta_weathersit = pm.ZeroSumNormal("beta_weathersit", sigma=1, dims="weathersit")
alpha = pm.Exponential("alpha", 10)
lambda_ = pm.math.exp(
intercept
+ beta_temp * temp
+ beta_hum * hum
+ beta_windspeed * windspeed
+ beta_holiday[holiday_idx]
+ beta_workingday[workingday_idx]
+ beta_weathersit[weathersit_idx]
)
# Use NB distribution since data is quite dispersed
likelihood = pm.NegativeBinomial("likelihood", mu = lambda_, alpha = alpha, observed = rentals_scaled, dims = "date")
nb_trace = pm.sample(
draws = 5000,
chains = 4,
tune = 1000,
cores = 4,
target_accept=0.95, # getting some divergences when not set
idata_kwargs = {"log_likelihood": True},
random_seed = rng,
)
prior_pred_nb = pm.sample_prior_predictive(samples = 1000, random_seed = rng)
ppc_nb = pm.sample_posterior_predictive(
trace = nb_trace, random_seed = rng, extend_inferencedata = True,
)
az.summary(nb_trace, hdi_prob = 0.95, var_names = ["~mu"], round_to = 3)
Auto-assigning NUTS sampler... Initializing NUTS using jitter+adapt_diag... Multiprocess sampling (4 chains in 4 jobs) NUTS: [intercept, beta_temp, beta_hum, beta_windspeed, beta_holiday, beta_workingday, beta_weathersit, alpha]
Sampling 4 chains for 1_000 tune and 5_000 draw iterations (4_000 + 20_000 draws total) took 49 seconds. Sampling: [alpha, beta_holiday, beta_hum, beta_temp, beta_weathersit, beta_windspeed, beta_workingday, intercept, likelihood] Sampling: [likelihood]
| mean | sd | hdi_2.5% | hdi_97.5% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
|---|---|---|---|---|---|---|---|---|---|
| intercept | -0.356 | 1.041 | -2.415 | 1.658 | 0.007 | 0.007 | 21198.709 | 13557.411 | 1.0 |
| beta_temp | 0.491 | 0.200 | 0.107 | 0.890 | 0.001 | 0.001 | 24615.140 | 15856.004 | 1.0 |
| beta_hum | 0.498 | 0.100 | 0.308 | 0.695 | 0.001 | 0.000 | 28101.113 | 15475.412 | 1.0 |
| beta_windspeed | 0.193 | 0.201 | -0.199 | 0.592 | 0.001 | 0.001 | 23984.636 | 14524.279 | 1.0 |
| beta_holiday[0] | -0.333 | 1.042 | -2.357 | 1.711 | 0.006 | 0.007 | 26113.399 | 15965.030 | 1.0 |
| beta_holiday[1] | 0.333 | 1.042 | -1.711 | 2.357 | 0.006 | 0.007 | 26113.399 | 15965.030 | 1.0 |
| beta_workingday[0] | 0.147 | 1.000 | -1.749 | 2.142 | 0.007 | 0.007 | 23442.313 | 14708.537 | 1.0 |
| beta_workingday[1] | -0.147 | 1.000 | -2.142 | 1.749 | 0.007 | 0.007 | 23442.313 | 14708.537 | 1.0 |
| beta_weathersit[2] | -0.014 | 1.011 | -1.960 | 1.984 | 0.006 | 0.007 | 25341.292 | 15910.256 | 1.0 |
| beta_weathersit[1] | -0.150 | 1.012 | -2.137 | 1.848 | 0.007 | 0.007 | 21584.289 | 15200.033 | 1.0 |
| beta_weathersit[3] | 0.164 | 1.011 | -1.802 | 2.172 | 0.007 | 0.007 | 23618.086 | 14431.092 | 1.0 |
| alpha | 0.001 | 0.002 | 0.000 | 0.001 | 0.000 | 0.000 | 13931.740 | 11538.681 | 1.0 |
plot_posterior_predictive(nb_trace, 'Negative Binomial Model: All Features With ZeroSumCategorical Priors', model_name = 'nb_reg_posterior_fit_all_feats_zsn', save_fig = False)